setwd(here::here("Final_Analysis", "BJPS Submission", "Replication"))
### Issues

#library(readr)
library(tidyverse)
library(modelsummary)
library(gt)
library(gtExtras)
library(gtsummary)
library(randomForest)
library(modelr)
library(broom)
library(marginaleffects)
library(cowplot)
library(ggpubr) # for theme_pubr()
library(estimatr) # for lm_robust()
library(mgcv)
library(GGally)


model_data <- read_csv("Data/incumbent_data.csv") |>
  mutate(year_factor = factor(year))

model_data |>
  filter(tweets > 25) |>
  mutate(Party = ifelse(Democrat == "Yes", "Democratic", "Republican")) |>
  group_by(year, Party) |>
  summarize(
    `Twitter Mean` = mean(Twitter_Ideo),
    `Twitter SD` = sd(Twitter_Ideo),
    `Voter Mean` = mean(mean),
    `Voter SD` = sd(mean),
    `Leg Mean` = mean(nokken_poole_dim1),
    `Leg SD` = sd(nokken_poole_dim1)
  ) |>
  ungroup() |>
  pivot_wider(
    id_cols = "year",
    names_from = "Party",
    values_from = c(
      "Twitter Mean",
      "Twitter SD",
      "Voter Mean",
      "Voter SD",
      "Leg Mean",
      "Leg SD"
    ),
    names_glue = "{Party}_{.value}"
  ) |>
  gt() |>
  cols_merge(
    c(`Democratic_Twitter Mean`, `Democratic_Twitter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Republican_Twitter Mean`, `Republican_Twitter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Democratic_Voter Mean`, `Democratic_Voter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Republican_Voter Mean`, `Republican_Voter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Democratic_Leg Mean`, `Democratic_Leg SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Republican_Leg Mean`, `Republican_Leg SD`),
    pattern = "{1} ({2})"
  ) |>
  tab_spanner(columns = starts_with("Democratic_"), label = "Democratic") |>
  tab_spanner(columns = starts_with("Republican_"), label = "Republican") |>
  cols_label_with(fn = function(x) {
    gsub("Democratic_|Republican_| Mean", "", x)
  }) |>
  fmt_number(columns = -1) |>
  gtsave("Out/main_text_summary.tex")


gt <- model_data |>
  filter(tweets > 25) |>
  mutate(Democrat = ifelse(Democrat == "Yes", "Democratic", "Republican")) |>
  mutate(seat = ifelse(seat == "federal:house", "House", "Senate")) |>
  select(
    Democrat,
    seat,
    mean,
    Twitter_Ideo,
    nokken_poole_dim1,
    PVI,
    tweets,
    fb_ideo_avg,
    Posts,
    web_ideology,
    ggum_ideo
  ) |>
  tbl_summary(
    by = Democrat,
    label = list(
      mean ~ "Perceived Ideology",
      Twitter_Ideo ~ "Twitter Ideology",
      tweets ~ "Number of Tweets",
      fb_ideo_avg ~ "FB Ideology",
      Posts ~ "Number of FB posts",
      web_ideology ~ "Website Ideo",
      nokken_poole_dim1 ~ "Nokken-Poole",
      ggum_ideo ~ "GGUM Ideology",
      seat ~ "Chamber"
    ),
    missing_text = "Missing"
  )

gtsave(as_gt(gt), "Out/incumbent_summary_chamber.tex")

gt <- model_data |>
  filter(tweets > 25) |>
  mutate(Democrat = ifelse(Democrat == "Yes", "Democratic", "Republican")) |>
  mutate(seat = ifelse(seat == "federal:house", "House", "Senate")) |>
  select(
    Democrat,
    seat,
    mean,
    Twitter_Ideo,
    nokken_poole_dim1,
    PVI,
    tweets,
    fb_ideo_avg,
    Posts,
    web_ideology,
    ggum_ideo
  ) |>
  tbl_summary(
    by = seat,
    label = list(
      mean ~ "Perceived Ideology",
      Twitter_Ideo ~ "Twitter Ideology",
      tweets ~ "Number of Tweets",
      fb_ideo_avg ~ "FB Ideology",
      Posts ~ "Number of FB posts",
      web_ideology ~ "Website Ideo",
      nokken_poole_dim1 ~ "Nokken-Poole",
      ggum_ideo ~ "GGUM Ideology",
      Democrat ~ "Party"
    ),
    missing_text = "Missing"
  )

gtsave(as_gt(gt), "Out/incumbent_summary_party.tex")

cor(
  model_data$Twitter_Ideo,
  model_data$ideo_wout_classifier,
  use = "complete.obs"
)

#### Pooled Models - Incumbents ####

controls = ~ . + I(PVI / 100) + I((PVI / 100)^2)

mod_all <- lm_robust(
  update.formula(
    mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = tweets > 25,
  clusters = Candidate
)
mod_dems <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "Yes" & tweets > 25,
  clusters = Candidate
)
mod_reps <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "No" & tweets > 25,
  clusters = Candidate
)

mod_hou <- lm_robust(
  update.formula(
    mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & tweets > 25,
  clusters = Candidate
)
mod_sen <- lm_robust(
  update.formula(
    mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & tweets > 25,
  clusters = Candidate
)

mod_hou_dems <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & Democrat == "Yes" & tweets > 25,
  clusters = Candidate
)
mod_sen_dems <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & Democrat == "Yes" & tweets > 25,
  clusters = Candidate
)

mod_hou_reps <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & Democrat == "No" & tweets > 25,
  clusters = Candidate
)
mod_sen_reps <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & Democrat == "No" & tweets > 25,
  clusters = Candidate
)

summaries <- lapply(
  list(
    "All_Both" = mod_all,
    "Republican_Both" = mod_reps,
    "Democratic_Both" = mod_dems,
    "All_Senate" = mod_sen,
    "Republican_Senate" = mod_sen_reps,
    "Democratic_Senate" = mod_sen_dems,
    "All_House" = mod_hou,
    "Republican_House" = mod_hou_reps,
    "Democratic_House" = mod_hou_dems
  ),
  tidy,
  conf.int = T
) |>
  list_rbind(names_to = "id")

summaries |>
  separate_wider_delim(id, delim = "_", names = c("Party", "Chamber")) |>
  filter(term %in% c("Twitter_Ideo", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "Twitter_Ideo",
      "Messaging Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Chamber, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = 1:3, linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)
  ) +
  labs(
    y = "Coefficient",
    x = "",
    caption = "Includes year fixed effects and party indicator for models with multiple parties."
  )

ggsave(
  "Out/coefficient_inc_pooled.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)

models <- list(
  "All_Both" = mod_all,
  "Republican_Both" = mod_reps,
  "Democratic_Both" = mod_dems,
  "All_Senate" = mod_sen,
  "Republican_Senate" = mod_sen_reps,
  "Democratic_Senate" = mod_sen_dems,
  "All_House" = mod_hou,
  "Republican_House" = mod_hou_reps,
  "Democratic_House" = mod_hou_dems
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)

gt <- gt |>
  tab_spanner(label = "Both", columns = All_Both:Democratic_Both) |>
  tab_spanner(label = "Senate", columns = All_Senate:Democratic_Senate) |>
  tab_spanner(label = "House", columns = All_House:Democratic_House) |>
  cols_label_with(fn = function(x) gsub("_.*", "", x)) |>
  gt_duplicate_column(` `, 1, after = 7) |>
  gt_split(col_slice_at = 7)


gtsave(gt, "Out/coefficient_inc_pooled.tex")

#### Robustness: With Categories #####

controls_issues = ~ . +
  I(PVI / 100) +
  I((PVI / 100)^2) +
  Macroeconomics +
  Campaigning +
  Health +
  `Civil Rights` +
  Defense +
  Immigration

mod_all <- lm_robust(
  update.formula(
    mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = tweets > 25,
  clusters = Candidate
)
mod_dems <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = Democrat == "Yes" & tweets > 25,
  clusters = Candidate
)
mod_reps <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = Democrat == "No" & tweets > 25,
  clusters = Candidate
)

mod_hou <- lm_robust(
  update.formula(
    mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = seat == "federal:house" & tweets > 25,
  clusters = Candidate
)
mod_sen <- lm_robust(
  update.formula(
    mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = seat == "federal:senate" & tweets > 25,
  clusters = Candidate
)

mod_hou_dems <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = seat == "federal:house" & Democrat == "Yes" & tweets > 25,
  clusters = Candidate
)
mod_sen_dems <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = seat == "federal:senate" & Democrat == "Yes" & tweets > 25,
  clusters = Candidate
)

mod_hou_reps <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = seat == "federal:house" & Democrat == "No" & tweets > 25,
  clusters = Candidate
)
mod_sen_reps <- lm_robust(
  update.formula(
    mean ~ Twitter_Ideo + nokken_poole_dim1 + year_factor,
    controls_issues
  ),
  data = model_data,
  subset = seat == "federal:senate" & Democrat == "No" & tweets > 25,
  clusters = Candidate
)

summaries <- lapply(
  list(
    "All_Both" = mod_all,
    "Republican_Both" = mod_reps,
    "Democratic_Both" = mod_dems,
    "All_Senate" = mod_sen,
    "Republican_Senate" = mod_sen_reps,
    "Democratic_Senate" = mod_sen_dems,
    "All_House" = mod_hou,
    "Republican_House" = mod_hou_reps,
    "Democratic_House" = mod_hou_dems
  ),
  tidy,
  conf.int = T
) |>
  list_rbind(names_to = "id")

models <- list(
  "All_Both" = mod_all,
  "Republican_Both" = mod_reps,
  "Democratic_Both" = mod_dems,
  "All_Senate" = mod_sen,
  "Republican_Senate" = mod_sen_reps,
  "Democratic_Senate" = mod_sen_dems,
  "All_House" = mod_hou,
  "Republican_House" = mod_hou_reps,
  "Democratic_House" = mod_hou_dems
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat",
    "Macroeconomics" = "Macroeconomics",
    "Campaigning" = "Campaigning",
    "Health" = "Health",
    "Civil Rights" = "Civil Rights",
    "Defense" = "Defense",
    "Immigration" = "Immigration",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)

gt <- gt |>
  tab_spanner(label = "Both", columns = All_Both:Democratic_Both) |>
  tab_spanner(label = "Senate", columns = All_Senate:Democratic_Senate) |>
  tab_spanner(label = "House", columns = All_House:Democratic_House) |>
  cols_label_with(fn = function(x) gsub("_.*", "", x)) |>
  gt_duplicate_column(` `, 1, after = 7) |>
  gt_split(col_slice_at = 7)


gtsave(gt, "Out/coefficient_inc_pooled_llama.tex")


##### Robustness: With Web Ideology #####

mod_all <- lm_robust(
  update.formula(
    mean ~ Democrat + web_ideology + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  clusters = Candidate
)
mod_dems <- lm_robust(
  update.formula(
    mean ~ web_ideology + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "Yes",
  clusters = Candidate
)
mod_reps <- lm_robust(
  update.formula(
    mean ~ web_ideology + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "No",
  clusters = Candidate
)


models <- list(
  "All" = mod_all,
  "Republican" = mod_reps,
  "Democratic" = mod_dems
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "web_ideology" = "Messaging Ideology (Web)",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)

gt <- gt |>
  cols_label_with(fn = function(x) gsub("_.*", "", x))


gtsave(gt, "Out/coefficient_inc_pooled_web.tex")


##### Robustness: With FB Ideology #####

mod_all <- lm_robust(
  update.formula(
    mean ~ Democrat + fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Posts > 25,
  clusters = Candidate
)
mod_dems <- lm_robust(
  update.formula(
    mean ~ fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "Yes" & Posts > 25,
  clusters = Candidate
)
mod_reps <- lm_robust(
  update.formula(
    mean ~ fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "No" & Posts > 25,
  clusters = Candidate
)

mod_hou <- lm_robust(
  update.formula(
    mean ~ Democrat + fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & Posts > 25,
  clusters = Candidate
)
mod_sen <- lm_robust(
  update.formula(
    mean ~ Democrat + fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & Posts > 25,
  clusters = Candidate
)

mod_hou_dems <- lm_robust(
  update.formula(
    mean ~ fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & Democrat == "Yes" & Posts > 25,
  clusters = Candidate
)
mod_sen_dems <- lm_robust(
  update.formula(
    mean ~ fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & Democrat == "Yes" & Posts > 25,
  clusters = Candidate
)

mod_hou_reps <- lm_robust(
  update.formula(
    mean ~ fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & Democrat == "No" & Posts > 25,
  clusters = Candidate
)
mod_sen_reps <- lm_robust(
  update.formula(
    mean ~ fb_ideo_avg + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & Democrat == "No" & Posts > 25,
  clusters = Candidate
)

summaries <- lapply(
  list(
    "All_Both" = mod_all,
    "Republican_Both" = mod_reps,
    "Democratic_Both" = mod_dems,
    "All_Senate" = mod_sen,
    "Republican_Senate" = mod_sen_reps,
    "Democratic_Senate" = mod_sen_dems,
    "All_House" = mod_hou,
    "Republican_House" = mod_hou_reps,
    "Democratic_House" = mod_hou_dems
  ),
  tidy,
  conf.int = T
) |>
  list_rbind(names_to = "id")

summaries |>
  separate_wider_delim(id, delim = "_", names = c("Party", "Chamber")) |>
  filter(term %in% c("fb_ideo_avg", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "fb_ideo_avg",
      "Facebook Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Chamber, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = 1:3, linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)
  ) +
  labs(
    y = "Coefficient",
    x = "",
    caption = "Includes year fixed effects and party indicator for models with multiple parties."
  )

ggsave(
  "Out/coefficient_inc_pooled_facebook.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)

models <- list(
  "All_Both" = mod_all,
  "Republican_Both" = mod_reps,
  "Democratic_Both" = mod_dems,
  "All_Senate" = mod_sen,
  "Republican_Senate" = mod_sen_reps,
  "Democratic_Senate" = mod_sen_dems,
  "All_House" = mod_hou,
  "Republican_House" = mod_hou_reps,
  "Democratic_House" = mod_hou_dems
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "fb_ideo_avg" = "Messaging Ideology (FB)",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)


gt <- gt |>
  tab_spanner(label = "Both", columns = All_Both:Democratic_Both) |>
  tab_spanner(label = "Senate", columns = All_Senate:Democratic_Senate) |>
  tab_spanner(label = "House", columns = All_House:Democratic_House) |>
  cols_label_with(fn = function(x) gsub("_.*", "", x)) |>
  gt_duplicate_column(` `, 1, after = 7) |>
  gt_split(col_slice_at = 7)


gtsave(gt, "Out/coefficient_inc_pooled_facebook.tex")


##### Robustness: Without Classifier #####

mod_all <- lm_robust(
  update.formula(
    mean ~ Democrat + ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = tweets_wout_classifier > 25,
  clusters = Candidate
)
mod_dems <- lm_robust(
  update.formula(
    mean ~ ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "Yes" & tweets_wout_classifier > 25,
  clusters = Candidate
)
mod_reps <- lm_robust(
  update.formula(
    mean ~ ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = Democrat == "No" & tweets_wout_classifier > 25,
  clusters = Candidate
)

mod_hou <- lm_robust(
  update.formula(
    mean ~ Democrat + ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" & tweets_wout_classifier > 25,
  clusters = Candidate
)
mod_sen <- lm_robust(
  update.formula(
    mean ~ Democrat + ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" & tweets_wout_classifier > 25,
  clusters = Candidate
)

mod_hou_dems <- lm_robust(
  update.formula(
    mean ~ ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" &
    Democrat == "Yes" &
    tweets_wout_classifier > 25,
  clusters = Candidate
)
mod_sen_dems <- lm_robust(
  update.formula(
    mean ~ ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" &
    Democrat == "Yes" &
    tweets_wout_classifier > 25,
  clusters = Candidate
)

mod_hou_reps <- lm_robust(
  update.formula(
    mean ~ ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:house" &
    Democrat == "No" &
    tweets_wout_classifier > 25,
  clusters = Candidate
)
mod_sen_reps <- lm_robust(
  update.formula(
    mean ~ ideo_wout_classifier + nokken_poole_dim1 + year_factor,
    controls
  ),
  data = model_data,
  subset = seat == "federal:senate" &
    Democrat == "No" &
    tweets_wout_classifier > 25,
  clusters = Candidate
)

summaries <- lapply(
  list(
    "All_Both" = mod_all,
    "Republican_Both" = mod_reps,
    "Democratic_Both" = mod_dems,
    "All_Senate" = mod_sen,
    "Republican_Senate" = mod_sen_reps,
    "Democratic_Senate" = mod_sen_dems,
    "All_House" = mod_hou,
    "Republican_House" = mod_hou_reps,
    "Democratic_House" = mod_hou_dems
  ),
  tidy,
  conf.int = T
) |>
  list_rbind(names_to = "id")

summaries |>
  separate_wider_delim(id, delim = "_", names = c("Party", "Chamber")) |>
  filter(term %in% c("ideo_wout_classifier", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "ideo_wout_classifier",
      "Facebook Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Chamber, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = 1:2, linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)
  ) +
  labs(
    y = "Coefficient",
    x = "",
    caption = "Includes year fixed effects and party indicator for models with multiple parties."
  )

ggsave(
  "Out/coefficient_inc_pooled_tw_no_classifier.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)

models <- list(
  "All_Both" = mod_all,
  "Republican_Both" = mod_reps,
  "Democratic_Both" = mod_dems,
  "All_Senate" = mod_sen,
  "Republican_Senate" = mod_sen_reps,
  "Democratic_Senate" = mod_sen_dems,
  "All_House" = mod_hou,
  "Republican_House" = mod_hou_reps,
  "Democratic_House" = mod_hou_dems
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "ideo_wout_classifier" = "Messaging Ideology (TW w/out classifier)",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)


gt <- gt |>
  tab_spanner(label = "Both", columns = All_Both:Democratic_Both) |>
  tab_spanner(label = "Senate", columns = All_Senate:Democratic_Senate) |>
  tab_spanner(label = "House", columns = All_House:Democratic_House) |>
  cols_label_with(fn = function(x) gsub("_.*", "", x)) |>
  gt_duplicate_column(` `, 1, after = 7) |>
  gt_split(col_slice_at = 7)


gtsave(gt, "Out/coefficient_inc_pooled_tw_no_classifier.tex")


#### Year Models - Incumbents ####

all <- model_data |>
  filter(year > 2010 & tweets > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(
        mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1,
        controls
      ),
      data = .x
    )
  )

names(all) <- as.character(seq(2012, 2022, by = 2))
models <- list("All" = all)

all <- lapply(all, broom::tidy, conf.int = T)

dems <- model_data |>
  filter(year > 2010 & Democrat == "Yes" & tweets > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(dems) <- as.character(seq(2012, 2022, by = 2))
models[["Dems"]] <- dems

dems <- lapply(dems, broom::tidy, conf.int = T)


reps <- model_data |>
  filter(year > 2010 & Democrat == "No" & tweets > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(reps) <- as.character(seq(2012, 2022, by = 2))
models[["Reps"]] <- reps

reps <- lapply(reps, broom::tidy, conf.int = T)


all <- list_rbind(all, names_to = "Year") |> mutate("Party" = "All")
reps <- list_rbind(reps, names_to = "Year") |> mutate("Party" = "Republican")
dems <- list_rbind(dems, names_to = "Year") |> mutate("Party" = "Democratic")

comp_beta <- function(data, years, term) {
  b1 <- data$estimate[data$Year == years[1] & data$term == term]
  b2 <- data$estimate[data$Year == years[2] & data$term == term]
  se_b1 <- data$std.error[data$Year == years[1] & data$term == term]
  se_b2 <- data$std.error[data$Year == years[2] & data$term == term]

  z_stat <- abs((b1 - b2) / sqrt(se_b1^2 + se_b2^2))
  p_val <- pnorm(z_stat, lower.tail = F) * 2
  return(list("Z Stat" = z_stat, "P Value" = p_val))
}

cat("#####################################################")
cat("Yearly Estimates - Base")
cat("#####################################################")

all |> filter(Year %in% c(2012, 2022) & term == "nokken_poole_dim1")
comp_beta(data = all, years = c(2012, 2022), term = "nokken_poole_dim1")

all |> filter(Year %in% c(2012, 2022) & term == "Twitter_Ideo")
comp_beta(data = all, years = c(2012, 2022), term = "Twitter_Ideo")


comp_beta(data = reps, years = c(2012, 2022), term = "nokken_poole_dim1")
comp_beta(data = reps, years = c(2012, 2022), term = "Twitter_Ideo")

comp_beta(data = dems, years = c(2012, 2022), term = "nokken_poole_dim1")
comp_beta(data = dems, years = c(2012, 2022), term = "Twitter_Ideo")
dems |> filter(Year %in% c(2012, 2022) & term == "Twitter_Ideo")
dems |> filter(Year %in% c(2012, 2022) & term == "nokken_poole_dim1")

list_rbind(list(all, reps, dems)) |>
  filter(term %in% c("Twitter_Ideo", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "Twitter_Ideo",
      "Messaging Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Year, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = 1:3, linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5)
  ) +
  labs(y = "Coefficient", x = "")

ggsave(
  "Out/coefficient_inc_years.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01),
  shape = "rbind"
)

gtsave(gt, "Out/coefficient_inc_years.tex")


##### Robust: Llama Categories ####

all <- model_data |>
  filter(year > 2010 & tweets > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(
        mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1,
        controls_issues
      ),
      data = .x
    )
  )

names(all) <- as.character(seq(2012, 2022, by = 2))
models <- list("All" = all)

all <- lapply(all, broom::tidy, conf.int = T)

dems <- model_data |>
  filter(year > 2010 & Democrat == "Yes" & tweets > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls_issues),
      data = .x
    )
  )
names(dems) <- as.character(seq(2012, 2022, by = 2))
models[["Dems"]] <- dems

dems <- lapply(dems, broom::tidy, conf.int = T)


reps <- model_data |>
  filter(year > 2010 & Democrat == "No" & tweets > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls_issues),
      data = .x
    )
  )
names(reps) <- as.character(seq(2012, 2022, by = 2))
models[["Reps"]] <- reps

reps <- lapply(reps, broom::tidy, conf.int = T)


all <- list_rbind(all, names_to = "Year") |> mutate("Party" = "All")
reps <- list_rbind(reps, names_to = "Year") |> mutate("Party" = "Republican")
dems <- list_rbind(dems, names_to = "Year") |> mutate("Party" = "Democratic")


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "Macroeconomics" = "Macroeconomics",
    "Campaigning" = "Campaigning",
    "Health" = "Health",
    "`Civil Rights`" = "Civil Rights",
    "Defense" = "Defense",
    "Immigration" = "Immigration",
    "DemocratYes" = "Democrat"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01),
  shape = "rbind"
)

gtsave(gt, "Out/coefficient_inc_years_llama.tex")


##### Robust: GGUM Ideology #####

all <- model_data |>
  filter(year > 2010 & year < 2022 & tweets > 25 & seat == "federal:house") |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Democrat + Twitter_Ideo + ggum_ideo, controls),
      data = .x
    )
  )

names(all) <- as.character(seq(2012, 2020, by = 2))
models <- list("All" = all)

all <- lapply(all, broom::tidy, conf.int = T)

dems <- model_data |>
  filter(
    year > 2010 &
      year < 2022 &
      Democrat == "Yes" &
      tweets > 25 &
      seat == "federal:house"
  ) |>
  group_by(year) |>
  group_map(
    ~ lm(update.formula(mean ~ Twitter_Ideo + ggum_ideo, controls), data = .x)
  )
names(dems) <- as.character(seq(2012, 2020, by = 2))
models[["Dems"]] <- dems

dems <- lapply(dems, broom::tidy, conf.int = T)


reps <- model_data |>
  filter(
    year > 2010 &
      year < 2022 &
      Democrat == "No" &
      tweets > 25 &
      seat == "federal:house"
  ) |>
  group_by(year) |>
  group_map(
    ~ lm(update.formula(mean ~ Twitter_Ideo + ggum_ideo, controls), data = .x)
  )
names(reps) <- as.character(seq(2012, 2020, by = 2))
models[["Reps"]] <- reps


reps <- lapply(reps, broom::tidy, conf.int = T)


all <- list_rbind(all, names_to = "Year") |> mutate("Party" = "All")
reps <- list_rbind(reps, names_to = "Year") |> mutate("Party" = "Republican")
dems <- list_rbind(dems, names_to = "Year") |> mutate("Party" = "Democratic")

list_rbind(list(all, reps, dems)) |>
  filter(term %in% c("Twitter_Ideo", "ggum_ideo")) |>
  mutate(
    term = ifelse(
      term == "Twitter_Ideo",
      "Twitter Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Year, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(
    yintercept = seq(0, 1.5, .5),
    linetype = "dotted",
    color = "gray"
  ) +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5)
  ) +
  labs(y = "Coefficient", x = "")

ggsave(
  "Out/coefficient_inc_years_ggum.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "ggum_ideo" = "Legisilative Ideology (GGUM)",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01),
  shape = "rbind"
)

gtsave(gt, "Out/coefficient_inc_years_ggum.tex")


##### Robust: FB Ideology #####

all <- model_data |>
  filter(year > 2010 & Posts > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(
        mean ~ Democrat + fb_ideo_avg + nokken_poole_dim1,
        controls
      ),
      data = .x
    )
  )

names(all) <- as.character(seq(2012, 2022, by = 2))
models <- list("All" = all)

all <- lapply(all, broom::tidy, conf.int = T)

dems <- model_data |>
  filter(year > 2010 & Democrat == "Yes" & Posts > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ fb_ideo_avg + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(dems) <- as.character(seq(2012, 2022, by = 2))
models[["Dems"]] <- dems

dems <- lapply(dems, broom::tidy, conf.int = T)


reps <- model_data |>
  filter(year > 2010 & Democrat == "No" & Posts > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ fb_ideo_avg + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(reps) <- as.character(seq(2012, 2022, by = 2))
models[["Reps"]] <- reps

reps <- lapply(reps, broom::tidy, conf.int = T)


all <- list_rbind(all, names_to = "Year") |> mutate("Party" = "All")
reps <- list_rbind(reps, names_to = "Year") |> mutate("Party" = "Republican")
dems <- list_rbind(dems, names_to = "Year") |> mutate("Party" = "Democratic")


list_rbind(list(all, reps, dems)) |>
  filter(term %in% c("fb_ideo_avg", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "fb_ideo_avg",
      "Facebook Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Year, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = seq(1, 3, 1), linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5)
  ) +
  labs(y = "Coefficient", x = "")

ggsave(
  "Out/coefficient_inc_years_facebook.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "fb_ideo_avg" = "Messaging Ideology (FB)",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01),
  shape = "rbind"
)

gtsave(gt, "Out/coefficient_inc_years_facebook.tex")


#### Robust - Standardized Coefficients ####

all <- model_data |>
  filter(year > 2010 & tweets > 25) |>
  group_by(year) |>
  mutate(
    mean = scale(mean),
    Twitter_Ideo = scale(Twitter_Ideo),
    nokken_poole_dim1 = scale(nokken_poole_dim1)
  ) |>
  group_map(
    ~ lm(
      update.formula(
        mean ~ Democrat + Twitter_Ideo + nokken_poole_dim1,
        controls
      ),
      data = .x
    )
  )

names(all) <- as.character(seq(2012, 2022, by = 2))
models <- list("All" = all)

all <- lapply(all, broom::tidy, conf.int = T)

dems <- model_data |>
  filter(year > 2010 & Democrat == "Yes" & tweets > 25) |>
  group_by(year) |>
  mutate(
    mean = scale(mean),
    Twitter_Ideo = scale(Twitter_Ideo),
    nokken_poole_dim1 = scale(nokken_poole_dim1)
  ) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(dems) <- as.character(seq(2012, 2022, by = 2))
models[["Dems"]] <- dems

dems <- lapply(dems, broom::tidy, conf.int = T)


reps <- model_data |>
  filter(year > 2010 & Democrat == "No" & tweets > 25) |>
  group_by(year) |>
  mutate(
    mean = scale(mean),
    Twitter_Ideo = scale(Twitter_Ideo),
    nokken_poole_dim1 = scale(nokken_poole_dim1)
  ) |>
  group_map(
    ~ lm(
      update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(reps) <- as.character(seq(2012, 2022, by = 2))
models[["Reps"]] <- reps

reps <- lapply(reps, broom::tidy, conf.int = T)


all <- list_rbind(all, names_to = "Year") |> mutate("Party" = "All")
reps <- list_rbind(reps, names_to = "Year") |> mutate("Party" = "Republican")
dems <- list_rbind(dems, names_to = "Year") |> mutate("Party" = "Democratic")

comp_beta <- function(data, years, term) {
  b1 <- data$estimate[data$Year == years[1] & data$term == term]
  b2 <- data$estimate[data$Year == years[2] & data$term == term]
  se_b1 <- data$std.error[data$Year == years[1] & data$term == term]
  se_b2 <- data$std.error[data$Year == years[2] & data$term == term]

  z_stat <- abs((b1 - b2) / sqrt(se_b1^2 + se_b2^2))
  p_val <- pnorm(z_stat, lower.tail = F) * 2
  return(list("Z Stat" = z_stat, "P Value" = p_val))
}

cat("#####################################################")
cat("Yearly Estimates - Base")
cat("#####################################################")

all |> filter(Year %in% c(2012, 2022) & term == "nokken_poole_dim1")
comp_beta(data = all, years = c(2012, 2022), term = "nokken_poole_dim1")

all |> filter(Year %in% c(2012, 2022) & term == "Twitter_Ideo")
comp_beta(data = all, years = c(2012, 2022), term = "Twitter_Ideo")


comp_beta(data = reps, years = c(2012, 2022), term = "nokken_poole_dim1")
comp_beta(data = reps, years = c(2012, 2022), term = "Twitter_Ideo")

comp_beta(data = dems, years = c(2012, 2022), term = "nokken_poole_dim1")
comp_beta(data = dems, years = c(2012, 2022), term = "Twitter_Ideo")
dems |> filter(Year %in% c(2012, 2022) & term == "Twitter_Ideo")
dems |> filter(Year %in% c(2012, 2022) & term == "nokken_poole_dim1")

list_rbind(list(all, reps, dems)) |>
  filter(term %in% c("Twitter_Ideo", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "Twitter_Ideo",
      "Messaging Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Year, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = seq(0, 1, .25), linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5)
  ) +
  labs(y = "Coefficient", x = "")

ggsave(
  "Out/coefficient_inc_years_standardized.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideo (Standardized)",
    "nokken_poole_dim1" = "Legislative Ideo (Standardized)",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01),
  shape = "rbind"
)

gtsave(gt, "Out/coefficient_inc_years_standardized.tex")


#### Robust - W/out Classifier ####

all <- model_data |>
  filter(year > 2010 & tweets_wout_classifier > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(
        mean ~ Democrat + ideo_wout_classifier + nokken_poole_dim1,
        controls
      ),
      data = .x
    )
  )

names(all) <- as.character(seq(2012, 2022, by = 2))
models <- list("All" = all)

all <- lapply(all, broom::tidy, conf.int = T)

dems <- model_data |>
  filter(year > 2010 & Democrat == "Yes" & tweets_wout_classifier > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ ideo_wout_classifier + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(dems) <- as.character(seq(2012, 2022, by = 2))
models[["Dems"]] <- dems

dems <- lapply(dems, broom::tidy, conf.int = T)


reps <- model_data |>
  filter(year > 2010 & Democrat == "No" & tweets_wout_classifier > 25) |>
  group_by(year) |>
  group_map(
    ~ lm(
      update.formula(mean ~ ideo_wout_classifier + nokken_poole_dim1, controls),
      data = .x
    )
  )
names(reps) <- as.character(seq(2012, 2022, by = 2))
models[["Reps"]] <- reps

reps <- lapply(reps, broom::tidy, conf.int = T)


all <- list_rbind(all, names_to = "Year") |> mutate("Party" = "All")
reps <- list_rbind(reps, names_to = "Year") |> mutate("Party" = "Republican")
dems <- list_rbind(dems, names_to = "Year") |> mutate("Party" = "Democratic")

comp_beta <- function(data, years, term) {
  b1 <- data$estimate[data$Year == years[1] & data$term == term]
  b2 <- data$estimate[data$Year == years[2] & data$term == term]
  se_b1 <- data$std.error[data$Year == years[1] & data$term == term]
  se_b2 <- data$std.error[data$Year == years[2] & data$term == term]

  z_stat <- abs((b1 - b2) / sqrt(se_b1^2 + se_b2^2))
  p_val <- pnorm(z_stat, lower.tail = F) * 2
  return(list("Z Stat" = z_stat, "P Value" = p_val))
}

cat("#####################################################")
cat("Yearly Estimates - Base")
cat("#####################################################")

all |> filter(Year %in% c(2012, 2022) & term == "nokken_poole_dim1")
comp_beta(data = all, years = c(2012, 2022), term = "nokken_poole_dim1")

all |> filter(Year %in% c(2012, 2022) & term == "ideo_wout_classifier")
comp_beta(data = all, years = c(2012, 2022), term = "ideo_wout_classifier")


comp_beta(data = reps, years = c(2012, 2022), term = "nokken_poole_dim1")
comp_beta(data = reps, years = c(2012, 2022), term = "ideo_wout_classifier")

comp_beta(data = dems, years = c(2012, 2022), term = "nokken_poole_dim1")
comp_beta(data = dems, years = c(2012, 2022), term = "ideo_wout_classifier")
dems |> filter(Year %in% c(2012, 2022) & term == "Twitter_Ideo")
dems |> filter(Year %in% c(2012, 2022) & term == "nokken_poole_dim1")

list_rbind(list(all, reps, dems)) |>
  filter(term %in% c("ideo_wout_classifier", "nokken_poole_dim1")) |>
  mutate(
    term = ifelse(
      term == "ideo_wout_classifier",
      "Messaging Ideology",
      "Legislative Ideology"
    )
  ) |>
  ggplot(aes(x = Year, y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange(aes(color = p.value < 0.05, shape = p.value < 0.05)) +
  facet_grid(term ~ Party) +
  theme_pubr() +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_shape("p < 0.05", labels = c("TRUE" = "Yes", "FALSE" = "No")) +
  geom_hline(yintercept = 0, color = "gray") +
  geom_hline(yintercept = 1:3, linetype = "dotted", color = "gray") +
  theme(
    legend.position = "bottom",
    axis.text.x = element_text(angle = 90, vjust = .5)
  ) +
  labs(y = "Coefficient", x = "")

ggsave(
  "Out/coefficient_inc_years_tw_no_classifier.png",
  bg = "white",
  height = 6,
  width = 6,
  dpi = 600
)


gt <- modelsummary(
  models,
  coef_map = c(
    "(Intercept)" = "Intercept",
    "ideo_wout_classifier" = "Messaging Ideology",
    "nokken_poole_dim1" = "Legislative Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "DemocratYes" = "Democrat"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01),
  shape = "rbind"
)

gtsave(gt, "Out/coefficient_inc_years_tw_no_classifier.tex")


#### Cross Validation- RF  ####

cv <- function(data, folds = 10, n = 100, formula = mean ~ 1) {
  set.seed(1)
  complete_data <- data |>
    drop_na(Twitter_Ideo, nokken_poole_dim1) |>
    filter(tweets > 25)
  cv_data <- complete_data |> crossv_mc(n = n)
  out <- map(
    cv_data$train,
    ~ randomForest(formula, data = ., na.action = na.omit)
  ) |>
    map2_dbl(cv_data$test, mae)
  tibble("err" = out)
}

controls <- ~ . + PVI
formulas <- list(
  "Base" = update.formula(mean ~ 1, controls),
  "Only Messaging" = update.formula(mean ~ Twitter_Ideo, controls),
  "Only Nokken-Poole" = update.formula(mean ~ nokken_poole_dim1, controls),
  "Full" = update.formula(mean ~ Twitter_Ideo + nokken_poole_dim1, controls)
)

all <- NULL
for (ii in 2:length(formulas)) {
  form = formulas[[ii]]
  cv_all <- model_data |>
    filter(year > 2010) |>
    group_by(year) |>
    group_map(~ cv(data = ., formula = update(form, . ~ . + Democrat)))
  names(cv_all) <- as.character(seq(2012, 2022, by = 2))
  cv_all <- list_rbind(cv_all, names_to = "Year") |> mutate("Party" = "All")

  cv_dem <- model_data |>
    filter(year > 2010 & Democrat == "Yes") |>
    group_by(year) |>
    group_map(~ cv(data = ., formula = form))
  names(cv_dem) <- as.character(seq(2012, 2022, by = 2))
  cv_dem <- list_rbind(cv_dem, names_to = "Year") |>
    mutate("Party" = "Democratic")

  cv_rep <- model_data |>
    filter(year > 2010 & Democrat == "No") |>
    group_by(year) |>
    group_map(~ cv(data = ., formula = form))
  names(cv_rep) <- as.character(seq(2012, 2022, by = 2))
  cv_rep <- list_rbind(cv_rep, names_to = "Year") |>
    mutate("Party" = "Republican")
  out <- list_rbind(list(cv_all, cv_rep, cv_dem))

  out$Model <- names(formulas)[ii]
  all <- rbind(out, all)
}


all |>
  mutate(
    Model = factor(
      Model,
      levels = c("Base", "Only Nokken-Poole", "Only Messaging", "Full")
    )
  ) |>

  ggplot(aes(x = Year, y = err, fill = Model)) +
  geom_boxplot() +
  facet_wrap(~Party) +
  theme_pubr() +
  # scale_color_brewer(type = "qual") +
  scale_fill_brewer(type = "qual") +
  theme(legend.position = "bottom") +
  labs(y = "Mean Absolute Error", x = "")


ggsave(
  "Out/random_forest_mae.png",
  bg = "white",
  height = 6,
  width = 10,
  dpi = 600
)


#### Candidates ####

model_data <- read_csv("Data/candidate_data.csv") |>
  mutate(year_factor = factor(year))


cor(
  model_data$Twitter_Ideo,
  model_data$ideo_wout_classifier,
  use = "complete.obs"
)

model_data |>
  filter(tweets > 25 & year > 2018) |>
  mutate(Party = ifelse(Democrat == "Yes", "Democratic", "Republican")) |>
  drop_na(Party) |>
  group_by(year, Party) |>
  summarize(
    `Twitter Mean` = mean(Twitter_Ideo),
    `Twitter SD` = sd(Twitter_Ideo),
    `Voter Mean` = mean(mean),
    `Voter SD` = sd(mean),
    `PVI Mean` = mean(PVI / 100),
    `PVI SD` = sd(PVI / 100)
  ) |>
  ungroup() |>
  pivot_wider(
    id_cols = "year",
    names_from = "Party",
    values_from = c(
      "Twitter Mean",
      "Twitter SD",
      "Voter Mean",
      "Voter SD",
      "PVI Mean",
      "PVI SD"
    ),
    names_glue = "{Party}_{.value}"
  ) |>
  gt() |>
  cols_merge(
    c(`Democratic_Twitter Mean`, `Democratic_Twitter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Republican_Twitter Mean`, `Republican_Twitter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Democratic_Voter Mean`, `Democratic_Voter SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Republican_Voter Mean`, `Republican_Voter SD`),
    pattern = "{1} ({2})"
  ) |>

  cols_merge(
    c(`Democratic_PVI Mean`, `Democratic_PVI SD`),
    pattern = "{1} ({2})"
  ) |>
  cols_merge(
    c(`Republican_PVI Mean`, `Republican_PVI SD`),
    pattern = "{1} ({2})"
  ) |>
  tab_spanner(columns = starts_with("Democratic_"), label = "Democratic") |>
  tab_spanner(columns = starts_with("Republican_"), label = "Republican") |>
  cols_label_with(fn = function(x) {
    gsub("Democratic_|Republican_| Mean", "", x)
  }) |>
  fmt_number(columns = -1) |>
  gtsave("Out/main_text_summary_candidates.tex")

gt <- model_data |>
  filter(tweets > 25 & year > 2018) |>
  mutate(Democrat = ifelse(Democrat == "Yes", "Democratic", "Republican")) |>
  drop_na(Democrat) |>
  mutate(seat = ifelse(seat == "federal:house", "House", "Senate")) |>
  select(Democrat, seat, mean, Twitter_Ideo, PVI, tweets, web_ideology) |>
  tbl_summary(
    by = Democrat,
    label = list(
      mean ~ "Perceived Ideology",
      Twitter_Ideo ~ "Twitter Ideology",
      tweets ~ "Number of Tweets",
      web_ideology ~ "Website Ideo",
      seat ~ "Chamber"
    ),
    missing_text = "Missing"
  )

gtsave(as_gt(gt), "Out/candidate_summary_party.tex")

gt <- model_data |>
  filter(tweets > 25 & year > 2018) |>
  mutate(Democrat = ifelse(Democrat == "Yes", "Democratic", "Republican")) |>
  drop_na(Democrat) |>
  mutate(seat = ifelse(seat == "federal:house", "House", "Senate")) |>
  select(Democrat, seat, mean, Twitter_Ideo, PVI, tweets, web_ideology) |>
  tbl_summary(
    by = seat,
    label = list(
      mean ~ "Perceived Ideology",
      Twitter_Ideo ~ "Twitter Ideology",
      tweets ~ "Number of Tweets",
      web_ideology ~ "Website Ideo",
      Democrat ~ "Party"
    ),
    missing_text = "Missing"
  )

gtsave(as_gt(gt), "Out/candidate_summary_chamber.tex")


mod_all <- lm(
  mean ~
    Twitter_Ideo +
      year_factor +
      Democrat +
      seat +
      I(PVI / 100) +
      I((PVI / 100)^2),
  data = model_data,
  subset = tweets > 25 & year > 2018
)


mod_dem <- lm(
  mean ~ Twitter_Ideo + seat + year_factor + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data,
  subset = Democrat == "Yes" & tweets > 25 & year > 2018
)

mod_rep <- lm(
  mean ~ Twitter_Ideo + seat + year_factor + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data,
  subset = Democrat == "No" & tweets > 25 & year > 2018
)


coef(mod_all)
confint(mod_all)

coef(mod_rep)
confint(mod_rep)

coef(mod_dem)
confint(mod_dem)

gt <- modelsummary(
  list("All" = mod_all, "Democratic" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "seatfederal:senate" = "Senate",
    "DemocratYes" = "Democratic",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)


gtsave(gt, "Out/coefficient_cand.tex")


#### Robust - Llama Categories ####

mod_all <- lm(
  mean ~
    Twitter_Ideo +
      year_factor +
      Democrat +
      seat +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      Macroeconomics +
      Campaigning +
      Health +
      Defense +
      Immigration +
      `Civil Rights`,
  data = model_data,
  subset = tweets > 25 & year > 2018
)


mod_dem <- lm(
  mean ~
    Twitter_Ideo +
      seat +
      year_factor +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      Macroeconomics +
      Campaigning +
      Health +
      Defense +
      Immigration +
      `Civil Rights`,
  data = model_data,
  subset = Democrat == "Yes" & tweets > 25 & year > 2018
)

mod_rep <- lm(
  mean ~
    Twitter_Ideo +
      seat +
      year_factor +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      Macroeconomics +
      Campaigning +
      Health +
      Defense +
      Immigration +
      `Civil Rights`,
  data = model_data,
  subset = Democrat == "No" & tweets > 25 & year > 2018
)


gt <- modelsummary(
  list("All" = mod_all, "Democratic" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "Twitter_Ideo" = "Messaging Ideology",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "seatfederal:senate" = "Senate",
    "DemocratYes" = "Democratic",
    "Macroeconomics" = "Macroeconomics",
    "Campaigning" = "Campaigning",
    "Health" = "Health",
    "`Civil Rights`" = "Civil Rights",
    "Defense" = "Defense",
    "Immigration" = "Immigration",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)


gtsave(gt, "Out/coefficient_cand_llama.tex")


#### Robust - No filtering ####

mod_all <- lm(
  mean ~
    ideo_wout_classifier +
      year_factor +
      Democrat +
      seat +
      I(PVI / 100) +
      I((PVI / 100)^2),
  data = model_data,
  subset = tweets_wout_classifier > 25 & year > 2018
)


mod_dem <- lm(
  mean ~
    ideo_wout_classifier + seat + year_factor + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data,
  subset = Democrat == "Yes" & tweets_wout_classifier > 25 & year > 2018
)

mod_rep <- lm(
  mean ~
    ideo_wout_classifier + seat + year_factor + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data,
  subset = Democrat == "No" & tweets_wout_classifier > 25 & year > 2018
)


gt <- modelsummary(
  list("All" = mod_all, "Democratic" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "ideo_wout_classifier" = "Messaging Ideology w/out Classifier",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "seatfederal:senate" = "Senate",
    "DemocratYes" = "Democratic",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)


gtsave(gt, "Out/coefficient_cand_tw_no_classifier.tex")


##### Robust: Candidates Web  #####
mod_all <- lm(
  mean ~
    web_ideology + year_factor + Democrat + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data
)


mod_dem <- lm(
  mean ~ web_ideology + year_factor + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data,
  subset = Democrat == "Yes"
)

mod_rep <- lm(
  mean ~ web_ideology + year_factor + I(PVI / 100) + I((PVI / 100)^2),
  data = model_data,
  subset = Democrat == "No"
)


gt <- modelsummary(
  list("All" = mod_all, "Democratic" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "web_ideology" = "Messaging Ideology (Web)",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022",
    "DemocratYes" = "Democratic"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)


gtsave(gt, "Out/coefficient_cand_web.tex")


#### non-linear PVI Stuff ####

mod_dem <- gam(
  mean ~
    s(PVI, by = Twitter_Ideo) +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      seat +
      year_factor,
  data = model_data,
  subset = tweets > 25 & Democrat == "Yes" & year > 2018
)
par(mfrow = c(2, 2))
set.seed(711)
gam.check(mod_dem)
dev.off()
mod_rep <- gam(
  mean ~
    s(PVI, by = Twitter_Ideo) +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      seat +
      year_factor,
  data = model_data,
  subset = tweets > 25 & Democrat == "No" & year > 2018
)
par(mfrow = c(2, 2))
set.seed(711)
gam.check(mod_rep)
dev.off()

split_data <- function(plot_data) {
  group <- 1
  plot_data$group <- NA
  plot_data$group[1] <- 1

  for (ii in 2:(nrow(plot_data))) {
    if ((plot_data$p.value[ii] > 0.05) != (plot_data$p.value[ii - 1] > 0.05)) {
      print(plot_data$PVI[ii])
      # plot_data$group[ii] <- group
      # plot_data <- rbind(plot_data, plot_data[ii,])
      group <- group + 1
    }
    plot_data$group[ii] <- group
  }
  return(plot_data)
}

p1 <- slopes(
  mod_dem,
  variables = c("Twitter_Ideo"),
  newdata = datagrid(PVI = seq(-30, 44, .1))
) |>
  split_data() |>
  ggplot(aes(
    x = PVI,
    y = estimate,
    ymin = conf.low,
    ymax = conf.high,
    group = group,
    color = p.value < 0.05,
    fill = p.value < 0.05
  )) +
  geom_line() +
  geom_ribbon(alpha = .25) +
  theme_pubr() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(legend.position = "bottom") +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_fill_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +

  ggtitle("Democratic Candidates") +
  scale_y_continuous("Effect of Messaging Ideology") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous(expression(
    "More Republican      " %<-% "     PVI     " %->% "      More Democratic"
  ))

y_box <- axis_canvas(p1, axis = "x") +
  geom_histogram(
    data = mod_dem$model,
    aes(x = PVI, y = after_stat(count)),
    fill = "gray"
  )

p1 <- insert_xaxis_grob(
  p1,
  y_box,
  position = "bottom",
  height = grid::unit(0.1, "null")
) |>
  ggdraw()


p2 <- slopes(
  mod_rep,
  variables = c("Twitter_Ideo"),
  newdata = datagrid(PVI = seq(-30, 44, .1))
) |>
  split_data() |>
  ggplot(aes(
    x = PVI,
    y = estimate,
    ymin = conf.low,
    ymax = conf.high,
    group = group,
    color = p.value < 0.05,
    fill = p.value < 0.05
  )) +
  geom_line() +
  geom_ribbon(alpha = .25) +
  theme_pubr() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(legend.position = "bottom") +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_fill_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +

  ggtitle("Republican Candidates") +
  scale_y_continuous("Effect of Messaging Ideology") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous(expression(
    "More Republican       " %<-% "     PVI     " %->% "      More Democratic"
  ))

y_box <- axis_canvas(p2, axis = "x") +
  geom_histogram(
    data = mod_rep$model,
    aes(x = PVI, y = after_stat(count)),
    fill = "gray"
  )

p2 <- insert_xaxis_grob(
  p2,
  y_box,
  position = "bottom",
  height = grid::unit(0.1, "null")
) |>
  ggdraw()


plot_grid(p1, p2, ncol = 1)
ggsave("Out/cand_smooth.png", height = 10, width = 8, bg = "white", dpi = 600)

model_data |>
  filter(tweets > 25 & Democrat == "No") |>
  mutate(in_window = PVI > -7.0 & PVI < 6.3) |>
  summarize(mean(in_window, na.rm = T))

model_data |>
  filter(tweets > 25 & Democrat == "Yes") |>
  mutate(in_window = PVI > -8.1 & PVI < 20.9) |>
  summarize(mean(in_window, na.rm = T))


gt <- modelsummary(
  list("Democrat" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "seatfederal:senate" = "Senate",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)

gtsave(gt, "Out/coefficient_cand_smooth.tex")


##### Robust - No Classifier #####

mod_dem <- gam(
  mean ~
    s(PVI, by = ideo_wout_classifier) +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      seat +
      year_factor,
  data = model_data,
  subset = tweets_wout_classifier > 25 & Democrat == "Yes" & year > 2018
)
par(mfrow = c(2, 2))
set.seed(711)
gam.check(mod_dem)
dev.off()
mod_rep <- gam(
  mean ~
    s(PVI, by = ideo_wout_classifier) +
      I(PVI / 100) +
      I((PVI / 100)^2) +
      seat +
      year_factor,
  data = model_data,
  subset = tweets_wout_classifier > 25 & Democrat == "No" & year > 2018
)
par(mfrow = c(2, 2))
set.seed(711)
gam.check(mod_rep)
dev.off()


p1 <- slopes(
  mod_dem,
  variables = c("ideo_wout_classifier"),
  newdata = datagrid(PVI = seq(-30, 44, .1))
) |>
  split_data() |>
  ggplot(aes(
    x = PVI,
    y = estimate,
    ymin = conf.low,
    ymax = conf.high,
    group = group,
    color = p.value < 0.05,
    fill = p.value < 0.05
  )) +
  geom_line() +
  geom_ribbon(alpha = .25) +
  theme_pubr() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(legend.position = "bottom") +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_fill_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +

  ggtitle("Democratic Candidates") +
  scale_y_continuous("Effect of Messaging Ideology") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous(expression(
    "More Republican      " %<-% "     PVI     " %->% "      More Democratic"
  ))

y_box <- axis_canvas(p1, axis = "x") +
  geom_histogram(
    data = mod_dem$model,
    aes(x = PVI, y = after_stat(count)),
    fill = "gray"
  )

p1 <- insert_xaxis_grob(
  p1,
  y_box,
  position = "bottom",
  height = grid::unit(0.1, "null")
) |>
  ggdraw()


p2 <- slopes(
  mod_rep,
  variables = c("ideo_wout_classifier"),
  newdata = datagrid(PVI = seq(-30, 44, .1))
) |>
  split_data() |>
  ggplot(aes(
    x = PVI,
    y = estimate,
    ymin = conf.low,
    ymax = conf.high,
    group = group,
    color = p.value < 0.05,
    fill = p.value < 0.05
  )) +
  geom_line() +
  geom_ribbon(alpha = .25) +
  theme_pubr() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(legend.position = "bottom") +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_fill_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +

  ggtitle("Republican Candidates") +
  scale_y_continuous("Effect of Messaging Ideology") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous(expression(
    "More Republican       " %<-% "     PVI     " %->% "      More Democratic"
  ))

y_box <- axis_canvas(p2, axis = "x") +
  geom_histogram(
    data = mod_rep$model,
    aes(x = PVI, y = after_stat(count)),
    fill = "gray"
  )

p2 <- insert_xaxis_grob(
  p2,
  y_box,
  position = "bottom",
  height = grid::unit(0.1, "null")
) |>
  ggdraw()


plot_grid(p1, p2, ncol = 1)
ggsave(
  "Out/cand_smooth_tw_no_classifier.png",
  height = 10,
  width = 8,
  bg = "white",
  dpi = 600
)

model_data |>
  filter(tweets > 25 & Democrat == "No") |>
  mutate(in_window = PVI > -7.0 & PVI < 6.3) |>
  summarize(mean(in_window, na.rm = T))

model_data |>
  filter(tweets > 25 & Democrat == "Yes") |>
  mutate(in_window = PVI > -8.1 & PVI < 20.9) |>
  summarize(mean(in_window, na.rm = T))


gt <- modelsummary(
  list("Democrat" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "seatfederal:senate" = "Senate",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)

gtsave(gt, "Out/coefficient_cand_smooth_tw_no_classifier.tex")


##### With Web Ideology  #####

mod_dem <- gam(
  mean ~
    s(PVI, by = web_ideology) + I(PVI / 100) + I((PVI / 100)^2) + year_factor,
  data = model_data,
  subset = tweets > 25 & Democrat == "Yes"
)
gam.check(mod_dem)

mod_rep <- gam(
  mean ~
    s(PVI, by = web_ideology) + I(PVI / 100) + I((PVI / 100)^2) + year_factor,
  data = model_data,
  subset = tweets > 25 & Democrat == "No"
)
gam.check(mod_rep)


p1 <- slopes(
  mod_dem,
  variables = c("web_ideology"),
  newdata = datagrid(PVI = seq(-30, 44, .1))
) |>
  split_data() |>
  ggplot(aes(
    x = PVI,
    y = estimate,
    ymin = conf.low,
    ymax = conf.high,
    group = group,
    color = p.value < 0.05,
    fill = p.value < 0.05
  )) +
  geom_line() +
  geom_ribbon(alpha = .25) +
  theme_pubr() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(legend.position = "bottom") +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_fill_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +

  ggtitle("Democratic Candidates") +
  scale_y_continuous("Effect of Web Ideology") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous(expression(
    "More Republican      " %<-% "     PVI     " %->% "      More Democratic"
  ))

y_box <- axis_canvas(p1, axis = "x") +
  geom_histogram(
    data = mod_dem$model,
    aes(x = PVI, y = after_stat(count)),
    fill = "gray"
  )

p1 <- insert_xaxis_grob(
  p1,
  y_box,
  position = "bottom",
  height = grid::unit(0.1, "null")
) |>
  ggdraw()


p2 <- slopes(
  mod_rep,
  variables = c("web_ideology"),
  newdata = datagrid(PVI = seq(-30, 44, .1))
) |>
  split_data() |>
  ggplot(aes(
    x = PVI,
    y = estimate,
    ymin = conf.low,
    ymax = conf.high,
    group = group,
    color = p.value < 0.05,
    fill = p.value < 0.05
  )) +
  geom_line() +
  geom_ribbon(alpha = .25) +
  theme_pubr() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  theme(legend.position = "bottom") +
  scale_color_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +
  scale_fill_viridis_d(
    "p < 0.05",
    begin = .1,
    end = .9,
    labels = c("TRUE" = "Yes", "FALSE" = "No")
  ) +

  ggtitle("Republican Candidates") +
  scale_y_continuous("Effect of Web Ideology") +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_x_continuous(expression(
    "More Republican       " %<-% "     PVI     " %->% "      More Democratic"
  ))

y_box <- axis_canvas(p2, axis = "x") +
  geom_histogram(
    data = mod_rep$model,
    aes(x = PVI, y = after_stat(count)),
    fill = "gray"
  )

p2 <- insert_xaxis_grob(
  p2,
  y_box,
  position = "bottom",
  height = grid::unit(0.1, "null")
) |>
  ggdraw()


plot_grid(p1, p2, ncol = 1)

ggsave(
  "Out/cand_smooth_web.png",
  height = 10,
  width = 8,
  bg = "white",
  dpi = 600
)

gt <- modelsummary(
  list("Democrat" = mod_dem, "Republican" = mod_rep),
  coef_map = c(
    "(Intercept)" = "Intercept",
    "I(PVI/100)" = "PVI",
    "I((PVI/100)^2)" = "PVI Squared",
    "seatfederal:senate" = "Senate",
    "DemocratYes" = "Democrat",
    "year_factor2014" = "2014",
    "year_factor2016" = "2016",
    "year_factor2018" = "2018",
    "year_factor2020" = "2020",
    "year_factor2022" = "2022"
  ),
  output = "gt",
  fmt = 2,
  gof_map = c("nobs", "r.squared", "adj.r.squared"),
  stars = c("*" = 0.05, "**" = 0.01)
)

gtsave(gt, "Out/coefficient_cand_smooth_web.tex")

#### Making Party Plots ####

plot_data <- read_csv("Data/plot_data.csv")


p1 <- plot_data |>
  ggplot(aes(x = year, y = mean, color = party)) +
  facet_wrap(~chamber) +
  geom_point() +
  geom_line(aes(group = candidate_id), alpha = .5) +
  scale_x_continuous("Year", breaks = seq(2010, 2022, 2)) +
  scale_y_continuous("Perceived Ideology", expand = c(0.1, 0)) +
  scale_color_manual(
    "Party",
    values = c(
      "Democrat" = "skyblue3",
      "Republican" = "orangered2",
      "Other" = "gray"
    )
  ) +
  theme_pubr(legend = "bottom")
ggsave(
  p1,
  filename = "Out/parties_overtime.png",
  height = 6,
  width = 10,
  units = "in",
  bg = "white",
  dpi = 600
)


p1 <- plot_data |>
  filter(tweets > 25) |>
  ggplot(aes(x = year, y = Twitter_Ideo, color = party)) +
  facet_wrap(~chamber) +
  geom_point() +
  geom_line(aes(group = candidate_id), alpha = .5) +
  scale_x_continuous("Year", breaks = seq(2010, 2022, 2)) +
  scale_y_continuous("Messaging Ideology") +
  scale_color_manual(
    "Party",
    values = c(
      "Democrat" = "skyblue3",
      "Republican" = "orangered2",
      "Other" = "gray"
    )
  ) +
  theme_pubr(legend = "bottom")
ggsave(
  p1,
  filename = "Out/parties_overtime_twitter.png",
  height = 6,
  width = 10,
  units = "in",
  bg = "white",
  dpi = 600
)


#### Correlation Plots ####

data <- read_csv("Data/incumbent_data.csv")

ggally_points_themed <- function(data, mapping, ...) {
  ggally_points(data, mapping) +
    theme_pubr() +
    theme(
      panel.grid.major.y = element_line(linetype = "dashed"),
      panel.grid.major.x = element_line(linetype = "dashed")
    )
}
ggally_barDiag_themed <- function(data, mapping, ...) {
  ggally_barDiag(data, mapping, alpha = .75, color = "black") +
    theme_pubr() +
    theme(
      panel.grid.major.y = element_line(linetype = "dashed"),
      panel.grid.major.x = element_line(linetype = "dashed")
    )
}

cols <- c(
  "mean",
  "Twitter_Ideo",
  "fb_ideo_avg",
  "nokken_poole_dim1",
  "web_ideology"
)
col_labels <- c("Perceptions", "Twitter", "Facebook", "Legislative", "Web")
for (ii in seq(2012, 2022, 2)) {
  p <- data |>
    filter(year == ii) |>

    # filter(chamber %in% c("House", "Senate")) |>
    mutate(
      Twitter_Ideo = ifelse(tweets > 25, Twitter_Ideo, NA),
      fb_ideo_avg = ifelse(Posts > 25, fb_ideo_avg, NA)
    ) |>
    mutate(
      party = case_match(
        party,
        200 ~ "Republican",
        100 ~ "Democrat",
        328 ~ "Democrat"
      )
    ) |>
    ggpairs(
      columns = if (ii < 2020) {
        cols[-5]
      } else {
        cols
      },
      columnLabels = if (ii < 2020) {
        col_labels[-5]
      } else {
        col_labels
      },
      mapping = aes(color = party),
      lower = list(continuous = ggally_points_themed),
      diag = list(continuous = ggally_barDiag_themed)
    )

  p <- p +
    scale_color_manual(
      "Party",
      values = c(
        "Democrat" = "skyblue3",
        "Republican" = "orangered2",
        "Other" = "gray"
      )
    ) +
    scale_fill_manual(
      "Party",
      values = c(
        "Democrat" = "skyblue3",
        "Republican" = "orangered2",
        "Other" = "gray"
      )
    )
  ggsave(
    filename = paste0("Out/incumbents_scatter_", ii, ".png"),
    p,
    height = 8,
    width = 8,
    bg = "white",
    dpi = 600
  )
}

##### Candidates #####

model_data <- read_csv("Data/candidate_data.csv") |>
  mutate(year_factor = factor(year))


cols <- c("mean", "Twitter_Ideo", "web_ideology")
col_labels <- c("Perceptions", "Twitter", "Web")
for (ii in seq(2018, 2022, 2)) {
  p <- model_data |>
    filter(year == ii) |>

    # filter(chamber %in% c("House", "Senate")) |>
    mutate(Twitter_Ideo = ifelse(tweets > 25, Twitter_Ideo, NA)) |>
    mutate(
      party = case_match(
        party,
        200 ~ "Republican",
        100 ~ "Democrat",
        328 ~ "Democrat"
      )
    ) |>
    ggpairs(
      columns = if (ii == 2018) {
        cols[-2]
      } else {
        cols
      },
      columnLabels = if (ii == 2018) {
        col_labels[-2]
      } else {
        col_labels
      },
      mapping = aes(color = party),
      lower = list(continuous = ggally_points_themed),
      diag = list(continuous = ggally_barDiag_themed)
    )

  p <- p +
    scale_color_manual(
      "Party",
      values = c(
        "Democrat" = "skyblue3",
        "Republican" = "orangered2",
        "Other" = "gray"
      )
    ) +
    scale_fill_manual(
      "Party",
      values = c(
        "Democrat" = "skyblue3",
        "Republican" = "orangered2",
        "Other" = "gray"
      )
    )
  ggsave(
    filename = paste0("Out/candidates_scatter_", ii, ".png"),
    p,
    height = 8,
    width = 8,
    bg = "white"
  )
}
